\section{Master Equation Well-Posedness and Stochastic Foundations}
\label{app:master-equation}

This appendix provides rigorous mathematical foundations for the continuous-time master equation formulation of hierarchical rule dynamics introduced in Section 2.2. We establish well-posedness, probability conservation, non-negativity preservation, existence and uniqueness of solutions, and the connection between Q-matrix structure and stochastic dynamics.

\subsection{Mathematical Preliminaries}

Let $\mathcal{R}_\ell = \{r_1, r_2, \ldots, r_{N_\ell}\}$ denote the finite rule vocabulary at level $\ell$, with $|\mathcal{R}_\ell| = N_\ell$. For each agent $i \in V_\ell$, the rule occupation probability vector is
\begin{equation}
    \mathbf{p}_i(t) = \bigl(p_i^{(r_1)}(t), p_i^{(r_2)}(t), \ldots, p_i^{(r_{N_\ell})}(t)\bigr)^\top \in \mathbb{R}^{N_\ell},
\end{equation}
where $p_i^{(r)}(t) = \Prob(r_i(t) = r \mid \mathcal{F}_0)$ represents the probability that agent $i$ employs rule $r$ at time $t$ given initial conditions $\mathcal{F}_0$.

The probability simplex $\Delta^{N_\ell - 1}$ is defined as
\begin{equation}
    \Delta^{N_\ell - 1} = \left\{\mathbf{p} \in \mathbb{R}^{N_\ell} : p^{(r)} \geq 0 \text{ for all } r, \quad \sum_{r \in \mathcal{R}_\ell} p^{(r)} = 1\right\}.
\end{equation}

\subsection{The Q-Matrix and Generator Structure}

\begin{definition}[Q-Matrix]
\label{def:qmatrix}
The infinitesimal generator (Q-matrix) for agent $i$ at level $\ell$ is the $N_\ell \times N_\ell$ matrix $Q^{(i,\ell)}$ with entries
\begin{equation}
    Q_{rr'}^{(i,\ell)} = \begin{cases}
        W_{r \to r'}^{(i,\ell)} & \text{if } r \neq r' \quad \text{(off-diagonal)} \\
        -\sum_{r'' \neq r} W_{r \to r''}^{(i,\ell)} & \text{if } r = r' \quad \text{(diagonal)}
    \end{cases},
    \label{eq:qmatrix-def}
\end{equation}
where transition rates $W_{r \to r'}^{(i,\ell)} \geq 0$ are given by
\begin{equation}
    W_{r \to r'}^{(i,\ell)} = \lambda_0 \exp\Bigl(\beta_\ell \Delta Q_\ell^{(i)}(r \to r') + \lambda_\ell^\top \Delta g_\ell^{(i)}(r \to r')\Bigr) \cdot \mathbb{1}_{g_\ell(r') \leq 0}.
    \label{eq:transition-rates-app}
\end{equation}
\end{definition}

\begin{proposition}[Row-Sum Conservation]
\label{prop:row-sum}
The Q-matrix satisfies row-sum conservation:
\begin{equation}
    \sum_{r' \in \mathcal{R}_\ell} Q_{rr'}^{(i,\ell)} = 0 \quad \text{for all } r \in \mathcal{R}_\ell.
\end{equation}
\end{proposition}

\begin{proof}
For any row $r$:
\begin{align}
    \sum_{r' \in \mathcal{R}_\ell} Q_{rr'}^{(i,\ell)} &= Q_{rr}^{(i,\ell)} + \sum_{r' \neq r} Q_{rr'}^{(i,\ell)} \\
    &= -\sum_{r'' \neq r} W_{r \to r''}^{(i,\ell)} + \sum_{r' \neq r} W_{r \to r'}^{(i,\ell)} \\
    &= 0.
\end{align}
The conservation property holds by definition of the diagonal entries.
\end{proof}

\subsection{Master Equation Formulation}

The continuous-time master equation (Kolmogorov forward equation) governing the evolution of rule probabilities is
\begin{equation}
    \frac{\diff \mathbf{p}_i}{\diff t} = Q^{(i,\ell)} \mathbf{p}_i(t),
    \label{eq:master-equation-vector}
\end{equation}
or equivalently in component form:
\begin{equation}
    \frac{\diff p_i^{(r)}}{\diff t} = \sum_{r' \neq r} W_{r' \to r}^{(i,\ell)} p_i^{(r')}(t) - \sum_{r' \neq r} W_{r \to r'}^{(i,\ell)} p_i^{(r)}(t).
    \label{eq:master-equation-components}
\end{equation}

The first term represents probability flowing \emph{into} state $r$ from other states $r'$, while the second term represents probability flowing \emph{out of} state $r$ to other states.

\subsection{Well-Posedness Theorem}

\begin{theorem}[Well-Posedness of Master Equation]
\label{thm:master-wellposed}
Let $Q^{(i,\ell)}$ be the Q-matrix defined in \Cref{def:qmatrix} with transition rates satisfying $W_{r \to r'}^{(i,\ell)} < \infty$ for all $r, r'$. Then the initial value problem
\begin{equation}
    \begin{cases}
        \frac{\diff \mathbf{p}_i}{\diff t} = Q^{(i,\ell)} \mathbf{p}_i(t), & t \geq 0 \\
        \mathbf{p}_i(0) = \mathbf{p}_0 \in \Delta^{N_\ell - 1}
    \end{cases}
    \label{eq:ivp}
\end{equation}
admits a unique solution $\mathbf{p}_i: [0,\infty) \to \Delta^{N_\ell - 1}$ satisfying:
\begin{enumerate}
    \item \textbf{Existence and uniqueness}: There exists a unique continuously differentiable solution $\mathbf{p}_i(t)$ for all $t \geq 0$.
    \item \textbf{Probability conservation}: $\sum_{r \in \mathcal{R}_\ell} p_i^{(r)}(t) = 1$ for all $t \geq 0$.
    \item \textbf{Non-negativity preservation}: If $p_i^{(r)}(0) \geq 0$ for all $r$, then $p_i^{(r)}(t) \geq 0$ for all $r$ and all $t \geq 0$.
    \item \textbf{Regularity}: The solution has the semigroup representation $\mathbf{p}_i(t) = e^{tQ^{(i,\ell)}} \mathbf{p}_0$ where $e^{tQ}$ is the matrix exponential.
\end{enumerate}
\end{theorem}

\begin{proof}
We prove each component separately.

\textbf{Part 1: Existence and uniqueness.}

The master equation \eqref{eq:master-equation-vector} is a linear ordinary differential equation with constant coefficients. Since $Q^{(i,\ell)}$ is a finite-dimensional matrix with bounded entries (all transition rates are finite), the Picard-Lindelöf theorem guarantees existence and uniqueness of solutions for all $t \geq 0$. The solution can be expressed via the matrix exponential:
\begin{equation}
    \mathbf{p}_i(t) = \exp(t Q^{(i,\ell)}) \mathbf{p}_0 = \sum_{k=0}^\infty \frac{t^k}{k!} (Q^{(i,\ell)})^k \mathbf{p}_0,
\end{equation}
which converges absolutely for all $t \geq 0$ since $\|Q^{(i,\ell)}\| < \infty$.

\textbf{Part 2: Probability conservation.}

Let $\mathbf{1} = (1, 1, \ldots, 1)^\top \in \mathbb{R}^{N_\ell}$ denote the vector of ones. We prove that $\mathbf{1}^\top \mathbf{p}_i(t) = 1$ for all $t \geq 0$.

Define $S(t) = \sum_{r \in \mathcal{R}_\ell} p_i^{(r)}(t) = \mathbf{1}^\top \mathbf{p}_i(t)$. Taking the time derivative:
\begin{align}
    \frac{\diff S}{\diff t} &= \mathbf{1}^\top \frac{\diff \mathbf{p}_i}{\diff t} = \mathbf{1}^\top Q^{(i,\ell)} \mathbf{p}_i(t) \\
    &= \sum_{r \in \mathcal{R}_\ell} \sum_{r' \in \mathcal{R}_\ell} Q_{rr'}^{(i,\ell)} p_i^{(r')}(t) \\
    &= \sum_{r' \in \mathcal{R}_\ell} \left(\sum_{r \in \mathcal{R}_\ell} Q_{rr'}^{(i,\ell)}\right) p_i^{(r')}(t).
\end{align}

By \Cref{prop:row-sum}, $\sum_{r'} Q_{rr'}^{(i,\ell)} = 0$ for each row $r$. However, we need the column sum. Note that:
\begin{align}
    \sum_{r \in \mathcal{R}_\ell} Q_{rr'}^{(i,\ell)} &= \sum_{r \neq r'} W_{r' \to r}^{(i,\ell)} + Q_{r'r'}^{(i,\ell)} \\
    &= \sum_{r \neq r'} W_{r' \to r}^{(i,\ell)} - \sum_{r \neq r'} W_{r' \to r}^{(i,\ell)} \\
    &= 0.
\end{align}

Therefore $\frac{\diff S}{\diff t} = 0$, implying $S(t) = S(0) = \sum_r p_i^{(r)}(0) = 1$ for all $t \geq 0$.

\textbf{Part 3: Non-negativity preservation.}

We prove that if $\mathbf{p}_0 \in \Delta^{N_\ell - 1}$, then $\mathbf{p}_i(t) \in \Delta^{N_\ell - 1}$ for all $t \geq 0$. Since probability conservation is already established, it suffices to show $p_i^{(r)}(t) \geq 0$ for all $r$ and $t \geq 0$.

Suppose for contradiction that there exists a first time $t^* > 0$ and state $r^*$ such that $p_i^{(r^*)}(t^*) = 0$ and $\frac{\diff p_i^{(r^*)}}{\diff t}\big|_{t=t^*} < 0$. At this time, the master equation gives:
\begin{equation}
    \frac{\diff p_i^{(r^*)}}{\diff t}\bigg|_{t=t^*} = \sum_{r' \neq r^*} W_{r' \to r^*}^{(i,\ell)} p_i^{(r')}(t^*) - \underbrace{p_i^{(r^*)}(t^*)}_{=0} \sum_{r' \neq r^*} W_{r^* \to r'}^{(i,\ell)}.
\end{equation}

Since $W_{r' \to r^*}^{(i,\ell)} \geq 0$ and $p_i^{(r')}(t^*) \geq 0$ for all $r' \neq r^*$ (by minimality of $t^*$), we have
\begin{equation}
    \frac{\diff p_i^{(r^*)}}{\diff t}\bigg|_{t=t^*} = \sum_{r' \neq r^*} W_{r' \to r^*}^{(i,\ell)} p_i^{(r')}(t^*) \geq 0,
\end{equation}
contradicting the assumption that the derivative is negative. Therefore $p_i^{(r)}(t) \geq 0$ for all $r$ and $t \geq 0$.

\textbf{Part 4: Semigroup representation.}

The matrix exponential $e^{tQ}$ defines a strongly continuous semigroup on $\mathbb{R}^{N_\ell}$ with generator $Q$. The semigroup property $e^{(t+s)Q} = e^{tQ} e^{sQ}$ holds, and $\mathbf{p}_i(t) = e^{tQ^{(i,\ell)}} \mathbf{p}_0$ is the unique classical solution satisfying $\frac{\diff \mathbf{p}_i}{\diff t} = Q^{(i,\ell)} \mathbf{p}_i(t)$ with $\mathbf{p}_i(0) = \mathbf{p}_0$.
\end{proof}

\subsection{Connection to Continuous-Time Markov Chains}

The master equation formulation has a direct probabilistic interpretation as a continuous-time Markov chain (CTMC) on the finite state space $\mathcal{R}_\ell$.

\begin{theorem}[Stochastic Process Representation]
\label{thm:ctmc}
Let $(r_i(t))_{t \geq 0}$ be a continuous-time stochastic process on $\mathcal{R}_\ell$ with transition rate matrix $Q^{(i,\ell)}$. Then:
\begin{enumerate}
    \item $(r_i(t))_{t \geq 0}$ is a time-homogeneous CTMC with right-continuous sample paths and at most countably many jumps on finite intervals.
    \item The holding time in state $r$ follows an exponential distribution with rate $\lambda_r = -Q_{rr}^{(i,\ell)} = \sum_{r' \neq r} W_{r \to r'}^{(i,\ell)}$.
    \item Given a jump from state $r$, the next state $r'$ is selected with probability
    \begin{equation}
        \pi_{r \to r'} = \frac{W_{r \to r'}^{(i,\ell)}}{\sum_{r'' \neq r} W_{r \to r''}^{(i,\ell)}} = \frac{W_{r \to r'}^{(i,\ell)}}{-Q_{rr}^{(i,\ell)}}.
    \end{equation}
    \item The probability distribution $p_i^{(r)}(t) = \Prob(r_i(t) = r)$ satisfies the master equation \eqref{eq:master-equation-components}.
\end{enumerate}
\end{theorem}

\begin{proof}
This is a standard result in continuous-time Markov chain theory. The infinitesimal generator $Q^{(i,\ell)}$ uniquely determines the stochastic process via the Kolmogorov forward equation (master equation). The exponential holding times arise from the memoryless property of continuous-time Markov processes, and the transition probabilities follow from normalizing the off-diagonal rates.
\end{proof}

\subsection{Connection to Discrete-Time Dynamics}

The discrete-time update rule in \Cref{eq:micro-update} emerges as a time-discretization of the continuous master equation.

\begin{proposition}[Discrete-Time Projection]
\label{prop:discrete-projection}
The discrete-time update with time step $\Delta t$ corresponds to the first-order Euler discretization:
\begin{equation}
    \mathbf{p}_i(t + \Delta t) \approx \mathbf{p}_i(t) + \Delta t \, Q^{(i,\ell)} \mathbf{p}_i(t) = (I + \Delta t \, Q^{(i,\ell)}) \mathbf{p}_i(t),
\end{equation}
which preserves probability conservation to first order. The stochastic term $\epsilon_i(t)$ in \Cref{eq:micro-update} arises from sampling a random jump time from the exponential distribution with rate $\lambda_r = -Q_{rr}^{(i,\ell)}$.
\end{proposition}

\begin{proof}
Expanding the matrix exponential to first order:
\begin{equation}
    e^{\Delta t \, Q^{(i,\ell)}} = I + \Delta t \, Q^{(i,\ell)} + O(\Delta t^2).
\end{equation}
The Euler method approximates $\mathbf{p}_i(t + \Delta t) \approx (I + \Delta t \, Q^{(i,\ell)}) \mathbf{p}_i(t)$. Probability conservation follows from $\mathbf{1}^\top (I + \Delta t \, Q^{(i,\ell)}) = \mathbf{1}^\top$ since $\mathbf{1}^\top Q^{(i,\ell)} = \mathbf{0}^\top$.

The stochastic innovation $\epsilon_i(t)$ represents the random jump time sampled from $\text{Exp}(\lambda_r)$ during simulation, introducing temporal variability into the discrete updates.
\end{proof}

\subsection{Stationary Distribution and Equilibrium}

Under appropriate ergodicity conditions, the CTMC converges to a unique stationary distribution.

\begin{theorem}[Convergence to Equilibrium]
\label{thm:equilibrium}
Suppose the Q-matrix $Q^{(i,\ell)}$ is irreducible (the hierarchical rule graph is strongly connected) and aperiodic. Then:
\begin{enumerate}
    \item There exists a unique stationary distribution $\boldsymbol{\pi}^* \in \Delta^{N_\ell - 1}$ satisfying $(Q^{(i,\ell)})^\top \boldsymbol{\pi}^* = \mathbf{0}$.
    \item For any initial distribution $\mathbf{p}_0 \in \Delta^{N_\ell - 1}$, we have $\lim_{t \to \infty} \mathbf{p}_i(t) = \boldsymbol{\pi}^*$.
    \item The convergence is exponential with rate determined by the spectral gap $\Delta = -\lambda_1(Q^{(i,\ell)})$ where $\lambda_1$ is the eigenvalue with second-largest real part:
    \begin{equation}
        \|\mathbf{p}_i(t) - \boldsymbol{\pi}^*\| \leq C e^{-\Delta t} \|\mathbf{p}_0 - \boldsymbol{\pi}^*\|.
    \end{equation}
\end{enumerate}
\end{theorem}

\begin{proof}[Proof Sketch]
Irreducibility and aperiodicity ensure the CTMC is ergodic. The Perron-Frobenius theorem applied to the transition probability matrix $P(t) = e^{tQ^{(i,\ell)}}$ guarantees a unique stationary distribution corresponding to the eigenvalue $\lambda_0 = 0$. All other eigenvalues have strictly negative real parts, with the second eigenvalue $\lambda_1 < 0$ determining the spectral gap $\Delta = -\lambda_1 > 0$. Convergence follows from spectral decomposition of the matrix exponential.
\end{proof}

\subsection{Detailed Balance and Reversibility}

Under specific conditions on transition rates, the system satisfies detailed balance.

\begin{proposition}[Detailed Balance Condition]
\label{prop:detailed-balance}
If the transition rates satisfy
\begin{equation}
    \frac{W_{r \to r'}^{(i,\ell)}}{W_{r' \to r}^{(i,\ell)}} = \exp\Bigl(\beta_\ell \bigl[Q_\ell(r') - Q_\ell(r)\bigr]\Bigr)
\end{equation}
and the stationary distribution is the Gibbs distribution
\begin{equation}
    \pi^*(r) = \frac{1}{Z} \exp\bigl(\beta_\ell Q_\ell(r)\bigr) \cdot \mathbb{1}_{g_\ell(r) \leq 0},
\end{equation}
then the system satisfies detailed balance:
\begin{equation}
    \pi^*(r) W_{r \to r'}^{(i,\ell)} = \pi^*(r') W_{r' \to r}^{(i,\ell)}.
\end{equation}
This implies the CTMC is reversible and the stationary distribution is the equilibrium distribution.
\end{proposition}

\begin{proof}
Direct verification:
\begin{align}
    \frac{\pi^*(r) W_{r \to r'}^{(i,\ell)}}{\pi^*(r') W_{r' \to r}^{(i,\ell)}} &= \frac{\exp(\beta_\ell Q_\ell(r))}{\exp(\beta_\ell Q_\ell(r'))} \cdot \frac{W_{r \to r'}^{(i,\ell)}}{W_{r' \to r}^{(i,\ell)}} \\
    &= \exp\bigl(\beta_\ell [Q_\ell(r) - Q_\ell(r')]\bigr) \cdot \exp\bigl(\beta_\ell [Q_\ell(r') - Q_\ell(r)]\bigr) \\
    &= 1.
\end{align}
Detailed balance implies the system is reversible and $(Q^{(i,\ell)})^\top \boldsymbol{\pi}^* = \mathbf{0}$.
\end{proof}

\subsection{Spectral Analysis and Relaxation Timescales}

The eigenspectrum of $Q^{(i,\ell)}$ characterizes the multi-timescale relaxation dynamics.

\begin{proposition}[Spectral Decomposition]
\label{prop:spectral-decomp}
Let $Q^{(i,\ell)}$ be the Q-matrix with eigenvalues $0 = \lambda_0 > \text{Re}(\lambda_1) \geq \text{Re}(\lambda_2) \geq \cdots \geq \text{Re}(\lambda_{N_\ell-1})$ and corresponding right eigenvectors $\{\mathbf{v}_k\}_{k=0}^{N_\ell-1}$. Then:
\begin{enumerate}
    \item The matrix exponential admits the decomposition
    \begin{equation}
        e^{tQ^{(i,\ell)}} = \sum_{k=0}^{N_\ell-1} e^{t\lambda_k} \mathbf{v}_k \mathbf{w}_k^\top,
    \end{equation}
    where $\{\mathbf{w}_k\}$ are left eigenvectors satisfying $\mathbf{w}_k^\top Q^{(i,\ell)} = \lambda_k \mathbf{w}_k^\top$.
    \item The relaxation timescale for mode $k$ is $\tau_k = -1/\text{Re}(\lambda_k)$.
    \item The slowest relaxation mode (spectral gap) is $\tau_1 = -1/\lambda_1$, determining the mixing time.
\end{enumerate}
\end{proposition}

\subsection{Summary and Implications}

The master equation formulation provides a rigorous foundation for hierarchical rule dynamics with the following key properties:

\begin{itemize}
    \item \textbf{Well-posedness}: Guaranteed by \Cref{thm:master-wellposed} via standard ODE theory.
    \item \textbf{Probability conservation}: Ensured by row-sum conservation $\sum_{r'} Q_{rr'}^{(i,\ell)} = 0$.
    \item \textbf{Non-negativity}: Preserved by the structure of the master equation (inflow from non-negative states).
    \item \textbf{Stochastic interpretation}: Direct connection to CTMCs via \Cref{thm:ctmc} with exponentially distributed holding times.
    \item \textbf{Discrete-time emergence}: The update rule \eqref{eq:micro-update} arises as Euler discretization (\Cref{prop:discrete-projection}).
    \item \textbf{Equilibrium convergence}: Exponential relaxation to stationary distribution with rate determined by spectral gap (\Cref{thm:equilibrium}).
    \item \textbf{Detailed balance}: Gibbs policies satisfy reversibility under equilibrium conditions (\Cref{prop:detailed-balance}).
    \item \textbf{Multi-timescale dynamics}: Spectral decomposition reveals hierarchy of relaxation modes (\Cref{prop:spectral-decomp}).
\end{itemize}

This mathematical framework justifies the continuous-time formulation in the main text and establishes that the Q-matrix structure with row-sum conservation is both necessary and sufficient for probability normalization in hierarchical rule dynamics.
